{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 用户和活动关联关系处理\n",
    "\n",
    "\n",
    "整个数据集中活动数目（events.csv）太多，所以下面的处理我们找出只在训练集和测试集中出现的活动和用户集合，并对他们重新编制索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存数据\n",
    "import cPickle\n",
    "\n",
    "import itertools\n",
    "\n",
    "#处理事件字符串\n",
    "import datetime\n",
    "\n",
    "import numpy as np\n",
    "import scipy.io as sio\n",
    "import scipy.sparse as ss\n",
    "\n",
    "#相似度/距离\n",
    "import scipy.spatial.distance as ssd\n",
    "\n",
    "from collections import defaultdict\n",
    "from sklearn.preprocessing import normalize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of uniqueUsers :3391\n",
      "number of uniqueEvents :13418\n"
     ]
    }
   ],
   "source": [
    " \"\"\"\n",
    "我们只关心train和test中出现的user和event，因此重点处理这部分关联数据\n",
    "\n",
    "train.csv 有6列：\n",
    "user：用户ID\n",
    "event：活动ID\n",
    "invited：是否被邀请（0/1）\n",
    "timestamp：ISO-8601 UTC格式时间字符串，表示用户看到该活动的时间\n",
    "interested, and not_interested\n",
    "\n",
    "Test.csv 除了没有interested, and not_interested，其余列与train相同\n",
    " \"\"\"\n",
    "    \n",
    "# 统计训练集中有多少不同的用户的events\n",
    "uniqueUsers = set()\n",
    "uniqueEvents = set()\n",
    "\n",
    "#倒排表\n",
    "#统计每个用户参加的活动   / 每个活动参加的用户\n",
    "eventsForUser = defaultdict(set)\n",
    "usersForEvent = defaultdict(set)\n",
    "    \n",
    "for filename in [\"C:/Users/860730/Desktop/hw4/train.csv\", \"C:/Users/860730/Desktop/hw4/test.csv\"]:\n",
    "    f = open(filename, 'rb')\n",
    "    \n",
    "    #忽略第一行（列名字）\n",
    "    f.readline().strip().split(\",\")\n",
    "    \n",
    "    for line in f:    #对每条记录\n",
    "        cols = line.strip().split(\",\")\n",
    "        uniqueUsers.add(cols[0])   #第一列为用户ID\n",
    "        uniqueEvents.add(cols[1])   #第二列为活动ID\n",
    "        \n",
    "        #eventsForUser[cols[0]].add(cols[1])    #该用户参加了这个活动\n",
    "        #usersForEvent[cols[1]].add(cols[0])    #该活动被用户参加\n",
    "    f.close()\n",
    "\n",
    "\n",
    "n_uniqueUsers = len(uniqueUsers)\n",
    "n_uniqueEvents = len(uniqueEvents)\n",
    "\n",
    "print(\"number of uniqueUsers :%d\" % n_uniqueUsers)\n",
    "print(\"number of uniqueEvents :%d\" % n_uniqueEvents)\n",
    "\n",
    "#用户关系矩阵表，可用于后续LFM/SVD++处理的输入\n",
    "#这是一个稀疏矩阵，记录用户对活动感兴趣\n",
    "userEventScores = ss.dok_matrix((n_uniqueUsers, n_uniqueEvents))\n",
    "userIndex = dict()\n",
    "eventIndex = dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "#重新编码用户索引字典\n",
    "for i, u in enumerate(uniqueUsers):\n",
    "    userIndex[u] = i\n",
    "    \n",
    "#重新编码活动索引字典    \n",
    "for i, e in enumerate(uniqueEvents):\n",
    "    eventIndex[e] = i\n",
    "\n",
    "n_records = 0\n",
    "ftrain = open(\"C:/Users/860730/Desktop/hw4/train.csv\", 'rb')\n",
    "ftrain.readline()\n",
    "for line in ftrain:\n",
    "    cols = line.strip().split(\",\")\n",
    "    i = userIndex[cols[0]]  #用户\n",
    "    j = eventIndex[cols[1]] #活动\n",
    "    \n",
    "    eventsForUser[i].add(j)    #该用户参加了这个活动\n",
    "    usersForEvent[j].add(i)    #该活动被用户参加\n",
    "        \n",
    "    #userEventScores[i, j] = int(cols[4]) - int(cols[5])   #interested - not_interested\n",
    "    score = int(cols[4])\n",
    "    #if score == 0:  #0在稀疏矩阵中表示该元素不存在，因此借用-1表示interested=0\n",
    "    #userEventScores[i, j] = -1\n",
    "    #else:\n",
    "    userEventScores[i, j] = score\n",
    "ftrain.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "##统计每个用户参加的活动，后续用于将用户朋友参加的活动影响到用户\n",
    "cPickle.dump(eventsForUser, open(\"C:/Users/860730/Desktop/hw4/PE_eventsForUser.pkl\", 'wb'))\n",
    "##统计活动参加的用户\n",
    "cPickle.dump(usersForEvent, open(\"C:/Users/860730/Desktop/hw4/PE_usersForEvent.pkl\", 'wb'))\n",
    "\n",
    "#保存用户-活动关系矩阵R，以备后用\n",
    "sio.mmwrite(\"C:/Users/860730/Desktop/hw4/PE_userEventScores\", userEventScores)\n",
    "\n",
    "\n",
    "#保存用户索引表\n",
    "cPickle.dump(userIndex, open(\"C:/Users/860730/Desktop/hw4/PE_userIndex.pkl\", 'wb'))\n",
    "#保存活动索引表\n",
    "cPickle.dump(eventIndex, open(\"C:/Users/860730/Desktop/hw4/PE_eventIndex.pkl\", 'wb'))\n",
    "\n",
    "    \n",
    "# 为了防止不必要的计算，我们找出来所有关联的用户 或者 关联的event\n",
    "# 所谓的关联用户，指的是至少在同一个event上有行为的用户pair\n",
    "# 关联的event指的是至少同一个user有行为的event pair\n",
    "uniqueUserPairs = set()\n",
    "uniqueEventPairs = set()\n",
    "for event in uniqueEvents:\n",
    "    i = eventIndex[event]\n",
    "    users = usersForEvent[i]\n",
    "    if len(users) > 2:\n",
    "        uniqueUserPairs.update(itertools.combinations(users, 2))\n",
    "        \n",
    "for user in uniqueUsers:\n",
    "    u = userIndex[user]\n",
    "    events = eventsForUser[u]\n",
    "    if len(events) > 2:\n",
    "        uniqueEventPairs.update(itertools.combinations(events, 2))\n",
    " \n",
    "#保存用户-事件关系对索引表\n",
    "cPickle.dump(uniqueUserPairs, open(\"C:/Users/860730/Desktop/hw4/FE_uniqueUserPairs.pkl\", 'wb'))\n",
    "cPickle.dump(uniqueEventPairs, open(\"C:/Users/860730/Desktop/hw4/PE_uniqueEventPairs.pkl\", 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#训练集和测试集中出现的用户数目和事件数目远小于users.csv出现的用户数和events.csv出现的事件数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "eventIndex = cPickle.load(open(\"C:/Users/860730/Desktop/hw4/PE_eventIndex.pkl\",\"rb\")) \n",
    "n_events = len(eventIndex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "fin = open(\"C:/Users/860730/Desktop/hw4/events.csv\")\n",
    "fin.readline()\n",
    "eventPropMatrix = ss.dok_matrix((n_events, 7))\n",
    "eventContMatrix = ss.dok_matrix((n_events, 101))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "for line in fin.readlines():\n",
    "    cols=line.strip().split(\",\")\n",
    "    eventId = str(cols[0])    \n",
    "    if eventIndex.has_key(eventId):\n",
    "        i = eventIndex[eventId]\n",
    "        for j in range(9,110):\n",
    "             eventContMatrix[i,j-9] = cols[j]\n",
    "fin.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "训练集和测试集中出现的用户数目和事件数目以及count集　＞　eventContMatrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import normalize\n",
    "eventContMatrix = normalize(eventContMatrix, norm=\"l2\", axis=0, copy=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "用L2統一模長"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('k:', 10, '/n', 'ch_score:', 595.3078891431494)\n",
      "('k:', 20, '/n', 'ch_score:', 361.18923634814035)\n",
      "('k:', 30, '/n', 'ch_score:', 271.97509111352514)\n",
      "('k:', 40, '/n', 'ch_score:', 224.97245871124846)\n",
      "('k:', 50, '/n', 'ch_score:', 192.02310317592725)\n",
      "('k:', 60, '/n', 'ch_score:', 172.20201922780663)\n",
      "('k:', 70, '/n', 'ch_score:', 157.04774401867644)\n",
      "('k:', 80, '/n', 'ch_score:', 144.54948169239609)\n",
      "('k:', 90, '/n', 'ch_score:', 133.72101975376196)\n",
      "('k:', 100, '/n', 'ch_score:', 124.42349424898106)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.cluster import KMeans\n",
    "from sklearn.metrics import calinski_harabaz_score\n",
    "\n",
    "K=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]\n",
    "def cluster(data,k):\n",
    "    kmeans = KMeans(k).fit(data)\n",
    "    ch_score = calinski_harabaz_score(data.toarray(),kmeans.predict(data))\n",
    "    return ch_score \n",
    "\n",
    "for i in K:\n",
    "    ch_score = cluster(eventContMatrix,i)\n",
    "    print('k:',i, '/n' , 'ch_score:',ch_score)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "聚类、CH_scores计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "ch_score_list = [595.3078891431494,361.18923634814035,271.97509111352514,271.97509111352514,192.02310317592725,172.20201922780663,\n",
    "                157.04774401867644,144.54948169239609,133.72101975376196,124.42349424898106]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x3e734940>]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD7CAYAAABkO19ZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xl8XWW97/HPb+/Mc5MmaVI6l45pAyVARQ6DVpkaOHo9ePRIgYr1KOq9Hi/OqBeP16scPYAoh2Kx6JWXCipSRlEoM6Up0LmlE21pkmZqmrSZs5/zx14JSWlJmiZZe/i+X6+8mv3stXd+WYTvWvtZz/Msc84hIiKxK+B3ASIiMrIU9CIiMU5BLyIS4xT0IiIxTkEvIhLjFPQiIjFOQS8iEuMU9CIiMU5BLyIS4xL8LgBg7NixbvLkyX6XISISVdatW1fnnMsfaLuICPrJkydTUVHhdxkiIlHFzPYOZjt13YiIxDgFvYhIjFPQi4jEuEEFvZlNMLOHzewZM3vKzBaY2UQze8LMXjKz1WY2yds2ycxWeO2vmdmikf0VRETkvQz2Yuxy4GvOuQ1mlgsEgfuBO5xzq8zscuBOoBy4CWh0zp1nZuOB1WZW4pxrH4lfQERE3tuAZ/RmNg5IAZaa2XPAD4CjwCzn3CoA59xjQImZJQGLgbu99gPAy8D5I1O+iIgMZDBdNxOBM4H7nHMXAJXAbUDtMdvVAHneV3Wf9iqg4Ng3NbNlZlZhZhW1tce+lYiIDJfBBH0jsMU597r3+EHgdMKB3lc+UAccpH+wj/Pa+nHOLXfOlTnnyvLzBxzvf1yv7zvEj57YNqTXiojEi8EE/U4gxcxmeY8XAeuAjWZ2KYB3wXWzc64T+Atwg9deCCwEXhzuwgE2HTjMXat3sb26eSTeXkQkJgwY9M65EHAdcJeZPQ9cBHwfuBH4mpm9CHwT+IL3kjuA8Wa2BlgF3DhSF2Ivm1dEwGDV+sqReHsRkZgwqFE3zrkNwMXHNB8+ThvOuQ7gmlMvbWBjM5J5//SxrNpQyVc+PAMzG40fKyISVaJ+wlT5/GL21rew8cBhv0sREYlIUR/0l8wdR2LQePgNdd+IiBxP1Ad9dloiF84o4JENVYRCzu9yREQiTtQHPUB5aRHVTW1U7D3kdykiIhEnJoJ+0exCUhIDGn0jInIcMRH06ckJfHB2IY9trKKrO+R3OSIiESUmgh7Co2/qj3bw8u56v0sREYkoMRP0F83MJyM5Qd03IiLHiJmgT0kM8uG5hTyxqZr2rm6/yxERiRgxE/QA5aXFNLV18fybdX6XIiISMWIq6M+fPpactERWbVD3jYhIj5gK+sRggMtKinhqy0FaO9R9IyICMRb0EJ481dLRzdPbavwuRUQkIsRc0J87JY+CzGQeXn/A71JERCJCzAV9MGBcMb+IZ7bX0tTW6Xc5IiK+i7mgh/Dom46uEE9tftcdDEVE4k5MBv2ZE3IYn5Oq0TciIsRo0JsZ5aXFvLCjjoajHX6XIyLiq5gMegiPvukKOZ7YVO13KSIivorZoJ9TlMXU/HStfSMicS9mg97MKJ9fzCt76qlpavO7HBER38Rs0EO4+8Y5eHRjld+liIj4JqaDfnpBJrOLstR9IyJxLaaDHsJn9a/ta2R/Q4vfpYiI+CL2g35+MQCPbFD3jYjEp5gP+gm5aZw5MUfdNyISt2I+6CF8Vr+lqomdNUf8LkVEZNTFRdBfMb8IM3hESyKISByKi6AvzErh3Cm5rFpfiXPO73JEREZVXAQ9hFe03FV7lK1VzX6XIiIyquIm6C8rKSIYMK1oKSJxJ26CPjc9ifOnj1X3jYjEnbgJegh337x9qJU39jf6XYqIyKiJq6D/8NxCkoIBVq3X5CkRiR+DCnozu8/MXjGz1d7XlWY20cyeMLOXvLZJ3rZJZrbCa3/NzBaN7K8weFkpiVw0M59HNlTSHVL3jYjEh4RBbjcBuNg519rTYGZPAXc451aZ2eXAnUA5cBPQ6Jw7z8zGA6vNrMQ51z7cxQ9FeWkxf91ykLVvNbBwap7f5YiIjLjBdt3kAHeZ2XNmdqeZpQGznHOrAJxzjwElZpYELAbu9toPAC8D5w9/6UPzwdkFpCUFeVhLIohInBhs0FcANzvnLgBqgZ97//ZVA+R5X33v31cFFBz7hma2zMwqzKyitvbYtxo5aUkJLJpdyOMbq+jsDo3azxUR8cuggt45t8w5t997+AAwmXCg95UP1AEH6R/s47y2Y99zuXOuzDlXlp+ff7J1n5Ly0mIOtXTy4s66Uf25IiJ+GDDozSzVzL7vdcsAXEb4DH+jmV3qbbMI2Oyc6wT+AtzgtRcCC4EXR6L4obpgxlgyUxI0+kZE4sKAF2Odc61mVge8amaHgQPAZ4FcYKWZ3Qy0A9d7L7kDWGFmawADboyUC7E9khOCXDp3HE9sqqats4SUxKDfJYmIjJhBjbpxzt0O3H5MczNw8XG27QCuOfXSRlZ5aTEPrHubZ9+s5ZK54/wuR0RkxMTVhKm+zpuWR256km5IIiIxL26DPiEY4PJ54/j71hpaOrr8LkdEZMTEbdBD+M5TrZ3d/G1rjd+liIiMmLgO+rMn51KYlazuGxGJaXEd9IGAsXh+Mc9ur+Vwa6ff5YiIjIi4DnqAK0uL6egO8dfN1QNvLCISheI+6Oefls3E3DStfSMiMSvug97MKC8t4qVd9dQdiah5XSIiwyLugx7Ck6e6Q47HN6n7RkRij4IemFmYyekFGRp9IyIxSUFPT/dNMWvfaqDqcOvALxARiSIKes/i+UU4B49u0IqWIhJbFPSeqfkZlIzPYpWCXkRijIK+j/L5xazf38i++ha/SxERGTYK+j6umF8EwKoNuigrIrFDQd/HaWPSOGvSGI2+EZGYoqA/Rvn8IrZVN7PjYLPfpYiIDAsF/TEun19EwNBFWRGJGQr6YxRkpvC+aXmsWl+Jc87vckRETpmC/jjK5xezp+4omyub/C5FROSUKeiP49KScSQETBdlRSQmKOiPIyctiQtm5PPIhipCIXXfiEh0U9CfQHlpEQcaW3l9/yG/SxEROSUK+hNYNLuQ5IQAq9Zr9I2IRDcF/QlkpiTygVkFPLKhim5134hIFFPQv4fy0mLqjrSzZne936WIiAyZgv49XDyzgPSkoNa+EZGopqB/D6lJQT40p5DHN1XT0RXyuxwRkSFR0A/gyjOKaWzp5MWddX6XIiIyJAr6AZw/PZ/s1EQe1uQpEYlSCvoBJCUEuKxkHH/dXE1bZ7ff5YiInDQF/SCUlxZztKObZ7bV+F2KiMhJU9APwsKpeYzNSNboGxGJSgr6QQgGjCvmjePvW2s40t7ldzkiIiflpILezG42s9Xe96Vm9qyZvWJmq8xsjNeeY2Z/NLOXzGyNmZ0xAnWPuvLSYtq7Qvxty0G/SxEROSmDDnozKwOmeN8b8DvgS865hcDjwC3eprcCq51z5wGfAVYOZ8F+WTBxDMXZKVq6WESizqCC3sxSgduAr3tNM4BDzrn13uNfAld431/uPcY5twFoMrNpw1axTwIBY3FpMc/tqKWxpcPvckREBm2wZ/S3Arc553qGneQB1T1POuc6gATvYYJzrrXPa6uAgmPf0MyWmVmFmVXU1taefOU+KJ9fTGe348nN1QNvLCISIQYMejO7BBjjnHuwT/NB+oS3mSUDPae5rd7jHuO87ftxzi13zpU558ry8/OHVPxoKxmfxeS8NC1dLCJRZTBn9IuBfDN7yMweAkqA7wIZZlbibXMN4X56gEeA6wHMbDaQ6ZzbPbxl+8PMKC8t5qVdddQ2t/tdjojIoCQMtIFz7ot9H5vZaufcEm80zT1mFgLqgWu9TW4G7jOzawEHLB3mmn11ZWkxP3t6J49vqmLJ+yb7XY6IyIAGDPpjOecu8v59A3jfcZ4/BFx5ypVFqNMLM5k1LpOH36hU0ItIVNCEqSEoLy2mYu8hDjS2DryxiIjPFPRDsHh+EQCPakkEEYkCCvohmJSXTulp2Rp9IyJRQUE/ROWlxWw8cJg9dUf9LkVE5D0p6IfoCq/75hEtiSAiEU5BP0RF2amcMzlXSxeLSMRT0J+C8tIi3jx4hO3VzX6XIiJyQgr6U3DZvCIChla0FJGIpqA/BWMzknn/9LGs2lCJc87vckREjktBf4rKS4vZW9/CxgOH/S5FROS4FPSn6JK540gMmrpvRCRiKehPUXZqIhfNLOB3a/fz5kFdlBWRyKOgHwbfWTyH1MQg1977KpVa/0ZEIoyCfhhMyE3jvqXncKStiyX3vqpbDYpIRFHQD5PZRVncc20Z+xpa+PR9FbR2dPtdkogIoKAfVgun5nH7x8/gtX2H+ML9r9HVHfK7JBERBf1wu2xeEbdcVcLft9XwzT9v1Ph6EfHdSd9hSgZ2zcJJ1Da1ccfTOynITOF/XzLT75JEJI4p6EfIlz80g9oj7dz5zE7yM5O59rzJfpckInFKQT9CzIzvX1VC3ZEOvrdqM2MzknuXNhYRGU3qox9BCcEAP/vEmZRNGsOXf/8GL+2q87skEYlDCvoRlpIY5JdLzmby2DSW/Xodmyu1Jo6IjC4F/SjITkvkvqXnkJWSwHW/Wsv+hha/SxKROKKgHyVF2an8+tPn0NEVYsm9r1J/pN3vkkQkTijoR9H0gkzuva6MqsOtLF25lqPtXX6XJCJxQEE/ys6alMudn1jApsomPvfb1+jo0uxZERlZCnofLJpTyA8/Oo/n3qzla3/cQCik2bMiMnI0jt4nV5dNoLa5nVuf3M7YjCS+dcUcv0sSkRiloPfR5y+aRk1TG/c8v4eCzBQ+c8FUv0sSkRikoPeRmfGd8rnUHengB49tZWxmEh858zS/yxKRGKOg91kwYPz046U0HO3gpgc2kJuezIUz8v0uS0RiiC7GRoDkhCB3LzmLGYWZfO7/r2P9/ka/SxKRGKKgjxBZKYmsXHo2eRlJXL9yLbtrj/hdkojECAV9BCnITOHXS8/FgCX3vkpNU5vfJYlIDBhU0JvZV83sJTN7zczuNbMkM5toZk947avNbJK3bZKZreiz/aKR/RViy5Sx6fzq+rNpONrBtb9aS1Nbp98liUiUGzDozWwskA283zm3AEgDrgJWAD93zp0H/Bi403vJTUCj114O3GVmySNRfKyaf1oO//Wps9hxsJllv66gvUs3GheRoRsw6J1zdc65bznnnJllEA79LcAs59wqb5vHgBIzSwIWA3d77QeAl4HzR+oXiFUXzMjnP/6plFd2N/Bvv19Pt2bPisgQDbqP3sx+C+wB/g40ArXHbFID5Hlf1X3aq4CC47zfMjOrMLOK2tpj30oA/vHM8Xz7itk8urGKW1Zt1o3GRWRIBj2O3jn3L2aWBvwGaCIc6H3lA3XAQcLB3uS1j/Pajn2/5cBygLKyMiXYCdzwD1OpaW5n+XO7KchK4caLp/tdkohEmcH00Z9hZtcCOOdagDcJ99NvNLNLvW0WAZudc53AX4AbvPZCYCHw4siUHx++fuksPnLmeG59cju/X7vP73JEJMoM5ox+O/A5M/si0Aq8DXwf+DOw0sxuBtqB673t7wBWmNkawIAbnXO6y8YpCASMH39sPvVHO/jGnzaSl57MojmFfpclIlHCIqHft6yszFVUVPhdRsQ72t7FJ+95hW3Vzdz/mXM5a1Ku3yWJiI/MbJ1zrmyg7TRhKoqkJydw73VnU5yTytKVFew42Ox3SSISBRT0USYvI5lfLz2HpIQAS+59larDrX6XJCIRTkEfhSbkpnHf9edwpK2LJStepbGlw++SRCSCKeij1JziLJYvKWNvfQs33FdBW6dmz4rI8Snoo9j7puVx2z+fwbp9h/jC/a/T1a0bjYvIuynoo9zl84r4P1fO5W9bD/LthzZp9qyIvIvuMBUDlrxvMjVN7dz5zE7Wv32Y1ER/j9/BgHH9+6dw+bwiX+sQkTAFfYz4yodnkJQQYO1bDX6XwoHGVr5w/2v85OpS3QNXJAIo6GOEmfGlD57udxkAtHR0sXTlWr7yh/U4Bx9doLAX8ZP66GXYpSUl8KvrzmHh1Dy+8sB6Hlz3tt8licQ1Bb2MiNSkICuuPZvzpuVx04PreaBiv98licQtBb2MmJ6wP3/6WL76xw38Ya3CXsQPCnoZUSmJQe5ZUtYb9r97Vcssi4w2Bb2MuJ6wv3BGPl//00buX6OwFxlNCnoZFSmJQe6+5iwumpnPN/+8kd+u2et3SSJxQ0Evo6Yn7D8wq4Bv/XkTv3lFYS8yGhT0MqqSE4Lc9akFfHBWATc/tIlfv/yW3yWJxDwFvYy65IQgv/jUAhbNLuQ7f9nMyhf3+F2SSExT0IsvkhOC/OJfFvChOYV8b9UW7n1BYS8yUhT04pukhAA//+QCLplbyC2PbGGFwl5kRCjoxVdJCQHu/OQCLisZx/cf2cIvn9/td0kiMUdBL75LDAa44xNncvm8cfz7o1u55zmFvchw0uqVEhESgwFu/+czMXuDHzy2lZBzfPbCaX6XJRITFPQSMRKDAW7/+BkEzPjh49sIOfjcRQp7kVOloJeIkhAM8J9Xl2LAj57YRsg5brx4ut9liUQ1Bb1EnIRggJ9eXUrA4NYnt+Oc4wsfiIybqohEIwW9RKSEYICfXB3uxvmPv75JyBExd9ASiTYKeolYwYBx6z+VgsFPn3qTkHP8r0Uz/C5LJOoo6CWiBQPGrR8rJWDGbX/bQcjBlxedjpn5XZpI1FDQS8QLBowf/4/5BAzu+PsOcI4vf2iGwl5kkBT0EhUCAeP/fXQ+ATPueHonIQdf+bDCXmQwFPQSNQIB4/9+ZB5mcOczOwk5x02XzFTYiwxAQS9RJRAwfvCP8zAzfrF6Fw74qsJe5D0p6CXqBALGv19VQsDgrtW7CDnH1y+dpbAXOYFBBb2ZXQ18GegCqoDrgNOBO4BkoBZY4pw7ZGY5wAqgCAgCn3XOvTH8pUs8CwSM719VgmHc/exunINvXKawFzmeAVevNLNc4KvAB5xz/wDsBT4D/A74knNuIfA4cIv3kluB1c6587ztVo5A3SKYGbdcNZdr3zeJ5c/t5gePbsU553dZIhFnwDN651yDmZ3vnGvr85o24JBzbr3X9ktgG/BF4HLgS95rN5hZk5lNc87tGv7yJd6ZGd+7ci5mxi9f2EPIwc2LZ+vMXqSPQXXdOOfazCwF+BHhrppNQHWf5zvMrOe9EpxzrX1eXgUUAP2C3syWAcsAJk6cOORfQMTM+G75HMzg3hf3EHLOe6ywF4HB99GfBtwD3OGce9zMphEO757nk4EO72GrmSU759q9x+OAg8e+p3NuObAcoKysTJ+35ZSYGd9ZPIeAGSte2INzrvdMXyTeDRj03pn8SuB659x+AOfcLjPLMLMS59wm4BrC/fQAjwDXA/9lZrOBTOecbhkkI87M+PYVswkY3PN8uBvnW1fMJiUx6HdpIr4azBn9ImA28Js+Z0dPEx55c4+ZhYB64FrvuZuB+8zsWsABS4ezYJH3YmZ88/LZBMy4+7nd/H7tfkonZHPulDzOnZrLWZPGkJakUcUSXywSRimUlZW5iooKv8uQGOKc44Wddbywo45X9jSw6cBhukOOhIAx77R3gr9s0hgyUxL9LldkSMxsnXOubMDtFPQSD460d7Fu7yHW7K5nzZ4GNrzdSGe3I2BQMj6bc6fkcu6UPM6ekkt2qoJfooOCXuQ9tHZ089q+cPC/sqeBN/Y10tEdwgxmj8vi3Knh4D93Si5j0pP8LlfkuBT0IiehrbObN/Y3smZ3A2v21PPavkO0dYYAmFmY2Rv850zJJT8z2edqRcIU9CKnoKMrxIa3G1mzp4FXdtezbu8hWjq6AZiWn865U8Nn+wun5lGYleJztRKvFPQiw6izO8SmA4dZs6eBNbvrqXjrEM3tXQBMzkvrvbh77tQ8xuek+lytxAsFvcgI6uoOsbWqmTV76nlldwNr32rgcGsnAONzUlk4NRz8C6fkMSE3VRO3ZEQo6EVGUSjk2FYdDv41uxt49a0GGo6GJ4sXZiUzb3w2c4qymFOcxZyibIW/DIvBBr1mjogMg0DAwiFenMX1759CKOTYWXsk3M2z9xBbKpt4elsNIe+8KjM5gdk9wV+cxZyiLE4vzCA5QbN4ZfjpjF5klLR1drO9upnNlU1sqTrMlsomtlY109oZvsibEDCmF2QwpziLucXeJ4CiLLLTNK5fjk9n9CIRJiUxSOmEHEon5PS2dYcce+uPsqWqiS2VTWyubOL5HXX86bUDvduMz0ntPesPHwSyGJ+jrh8ZPAW9iI+CAWNqfgZT8zNYPL+4t72muY2tVc1sqWxiS1UTmysP87etB+n5AJ6VktDb399zEJhekEFSwoD3EpI4pKAXiUAFmSkUZKZw4Yz83raWji62Vb8T/lsqm7j/1b29E7uSggFOL8zoc9E3i9nFWWRpLZ+4p6AXiRJpSQksmDiGBRPH9LZ1hxx76o54/f5NvRd9H1j3du82E3JTmVuUzayiTKblZzAtP4MpY9NJTdKF33ihoBeJYsGAMb0gk+kFmVx1xnggvHJnbXM7m73g7/kE8OSW6t6uHzMozk5lWkEG0/LTmZof/nd6fgb5mcnq/48xCnqRGGNmFGSlUJCVwsUze28ER1tnN3vqjrKr9gi7ao6yu+4Iu2qPsHZPQ+/IHwgP/Zyanx4+++9zIJiUl6bhn1FKQS8SJ1ISg8wuymJ2UVa/9lDIUd3Uxu5a7yDgfb28u54/vf7O6J+AwcTctN6z/54DwdSx6eSmJ+lTQART0IvEuUDAKM5JpTgnlfNPH9vvuSPtXeyp9c7+a46wyzsYvLCzjo6uUO92OWmJXv9/TzdQ+PsJuWkkBjUSyG8KehE5oYzkBOadls2807L7tXeHHJWNreysPfLOJ4GaIzy9rZY/VLxzITgxaEzMTet39j81P52JuemMzdCngNGioBeRkxYMGBNy05iQm8bFM/s/d7i1k9214bP/3b1dQUd5ZnsNnd3vzMRPTQwyMTeNiXlpTMxNY1Je+P0m5qZx2phUXQ8YRgp6ERlW2amJnDlxDGf2GQYK4RU/9zW0sLe+pd+/++pbeH5Hbe98AAiPCirKSuk9CIQPCOnhA0JuGjlpifo0cBIU9CIyKhKCgd5ZwMdyzlF7pJ19fQ4C+xvC3z+zvZba5vZ+22cmJzDB+xTQ71NBbjpFOSm6LnAMBb2I+M7MemcDl03OfdfzLR1d7G9oDX8CaGhhX/1R9jW0sP1gM3/fWkNH9zufBoIBozgnhUm56f0PBt4BIR5nCivoRSTipSUlMHNcJjPHZb7ruVDIcbC5rV9XUM8B4cnN1b33BeiRk5bIpNw0xo9JpTg7laKcVMbnpFCUHR55lJeeRCAQW91CCnoRiWqBgFGUnUpRdvjOXsdqbutkX8M7XUE9B4Rt1c08va2m37UBCK8ZVJST4h0EUhifk+odBFJ6h6FmJEdXdEZXtSIiJykzJZG5xdnMLc5+13POORpbOjnQ2ErV4TYqG1upPNxKZWMbVY2tvLKrnoPN7XSH3DHvmeAdAN4J/2Lv4FCck0phVkpErSSqoBeRuGVmjElPYkx6EiXj330ggPBooZrmdqoOt3KgMXwwqGoMf191uJU39jdyqKXzmPeF/IzkfgcAP7uIFPQiIu8hIRjoPWs/a9Lxt2nt6KbycCtVjX0/FYQ/JWyrbuaZbbX91hMCSEoIUJSdwr99aEbvgnQj9juM6LuLiMSB1KRg7xLQx/NeXUR56ckjXp+CXkRkhA2mi2gkRc7VAhERGREKehGRGKegFxGJcQp6EZEYp6AXEYlxAwa9mX3MzP5gZvv6tE00syfM7CUzW21mk7z2JDNb4bW/ZmaLRrJ4EREZ2GDO6GuBzwNJfdpWAD93zp0H/Bi402u/CWj02suBu8xs5AeJiojICQ0Y9M65Z51zdT2PzSwNmOWcW+U9/xhQYmZJwGLgbq/9APAycP5IFC4iIoMzlAlTOYTP8vuqAfK8r+o+7VVAwfHexMyWAcu8h0fMbPsQaokkY4G6AbeKH9of79C+6E/7o79T2R8nWJShv6EEfR3hQO8r32s/SDjYm7z2cV7buzjnlgPLh/DzI5KZVTjnyvyuI1Jof7xD+6I/7Y/+RmN/nPSoG+dcB7DRzC4F8C64bnbOdQJ/AW7w2guBhcCLw1euiIicrKGudXMjsNLMbgbageu99juAFWa2BjDgRudc+wneQ0RERsGgg945N67P93uBi4+zTQdwzfCUFnViphtqmGh/vEP7oj/tj/5GfH+Yc27grUREJGppZqyISIxT0A+RmV1tZi+b2fPezOE0Mys1s2fN7BUzW2VmY/yuczSZ2c1mttr7Pm73hZlNMLOHzewZM3vKzBacaDZ5rDOzb5rZq2b2opk9YGaZ8fS3ETErCzjn9HWSX0AuUAGkeo9vBf4nsBUo9do+D/zM71pHcZ+UAfcCqwlfiI/nffE4ML/P30o+8BRQ7rVdDqzyu85R2A/zgDVA0Hv8n4Rnz8fN3wZwIeFx8tV92o77twB8C/iJ9/14YAeQPBx16Ix+CJxzDcD5zrlWrykBaAMOOefWe22/BK7wo77RZmapwG3A172mGcTvvhgHpABLzew54AfAUU48mzyW1REeldcz6CNIeI5N3PxtuAhZWUBBP0TOuTYzSzGz24FUYBN9ZgW78AikeLlV463Abc65Gu9xvxnScbYvJgJnAvc55y4AKgkfBE80mzxmOeeqCK+D9Qsz+wZwiPj+/wSGaWWBk6WgHyIzOw34M/CEc+5fCf8HKujzfDLQ4VN5o8bMLgHGOOce7NPcM0O6Z5u42BeeRmCLc+517/GDwOmceDZ5zDKzi4ELnHOfds79ENgM/Cvx+7cBg1tZoMcJVxY4WQr6ITCzFGAlsMw59ziAc24XkGFmJd5m1xDuq411i4F8M3vIzB4CSoDvEp/7AmAnkGJms7zHi4B1nHg2eSybBfRdvTaJ8Nl7vP5t9HyCGfWVBTSOfgjMrKcvbUef5qeBh4G7gBBQD1zrnDs0+hX6x8xWO+cuMrMziNN9YWbzgdsJh1oNsJTwR/aVhMOuHbjehScexiwzSwd+AZx8EpaXAAAAYUlEQVQFHAZaCQdZDnH2t2Fm1c6bdOqNslnJMX8LXj/9CsLXuAz4pnPub8Py8xX0IiKxTV03IiIxTkEvIhLjFPQiIjFOQS8iEuMU9CIiMU5BLyIS4xT0IiIxTkEvIhLj/htUgphunYtjZQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x9fe2160>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(K, ch_score_list)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "打印結果　k=10時､calinski_harabaz_score最大"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
